import requests
import lxml.etree as le
import pandas as pd

url = 'https://www.runoob.com/html/html-tutorial.html'
x1 = '//div[@id="leftcolumn"]/a/text()'
x2 ='//div[@id ="leftcolumn"]/a/@href'
content = requests.get(
    url = url
).content
ret = le.HTML(content).xpath(x1)
wb = le.HTML(content).xpath(x2)
data = []
data2 = []
for i in ret:
    data.append(i.strip())
for j in wb:
    data2.append(j.strip())
k = pd.DataFrame()
k['目录'] = data
k['链接'] = data2
print(k)
k.to_excel('./spider homework.xls',sheet_name='爬虫作业',header = True,index= False)